package test.spider;

import java.util.List;
import java.util.concurrent.CopyOnWriteArraySet;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

public class WebMagic {
	
	static CopyOnWriteArraySet<String> hashlinks = new CopyOnWriteArraySet<>();
	
	public static void main(String[] args) {
		us.codecraft.webmagic.Spider.create(new PageProcessor() {
			
			@Override
			public void process(Page page) {
				List<String> links = page.getHtml().links().all();
				for(String link : links) {
					if(link.startsWith("/")) link = "http://licai.51credit.com" + link;
					if(!link.startsWith("http://licai.51credit.com")) continue;
					
					if(hashlinks.contains(link)) continue;
					else hashlinks.add(link);
					
					if(link.matches("http://licai.51credit.com/article/detail/.+")) {
						System.out.println(link);
					} else {
						page.addTargetRequest(link);
					}
				}
			}
			
			
			@Override
			public Site getSite() {
				return Site.me();
			}
		}).addUrl("http://licai.51credit.com")
			.thread(Runtime.getRuntime().availableProcessors())
			.run();;
	}

}
